##########################################
########## Topic Validation ###############

# Data In: "topic_validation.csv"
          
# Data Out: 
# Figure A3

###########################################

#Load Packages
library(readr)
library(tidyverse)
library(lubridate)


# Set options for plotting 
options(scipen=999999)

# Set working directory to replication folder (Refugee_Info_Replication)

# Open code file from "Refugee_Info_Replication/code/post_type_analysis.R"

# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path

# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
  # Calculate the parent directory of the script's directory
  parent_directory <- dirname(dirname(script_path))
  
  # Set the working directory to the parent directory
  setwd(parent_directory)
} else {
  cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()

# Read in Topic Validation Data
data_scored<-read_csv("data/topic_validation.csv")

#############
# Figure A3 #
#############

# Make Histogram

prop_relevant<-data_scored %>%
  group_by(topic) %>%
  summarise( prop=sum(na.omit(score_clean))/n())

prop_relevant$topic_clean<-topic_clean<-str_split(prop_relevant$topic, pattern=":", simplify=TRUE)[,1]

ggplot(prop_relevant, aes(y=prop, x=topic_clean)) + 
  geom_bar(position="dodge", stat="identity", color="black")+
  theme_minimal(base_size=22)+
  labs(y = "Proportion of Relevant Posts", x = "Topic")+
  scale_fill_grey()+
  geom_text(aes(label=sprintf("%.2f", prop)), position=position_dodge(width=0.9), vjust=-0.25, size=10)
ggsave("plots/Figure_A3.pdf", width = 20, height = 10)


